"""
这是用于连接本地MongoDB数据库，并在取出微博数据的同时进行清洗，以便后续上传至geomesa

@author stan / 梁超
@since 0.1.0
@Date 2022/03/28
"""
import pandas as pd
import pymongo

from datetime import datetime
from com.sikong.geoTransform.transformer import GeoTransformer

class GetMongoData:
    """
    主要包含坐标转换，字符串清洗等功能
    """
    def __init__(self,collection:str,dataBase:str):
        """
        :param collection: 传入表名称
        :param dataBase:  传入数据库名称
        """
        self.collection = collection
        self.openDB(dataBase)

    def openDB(self,dataBase):

        self.connect = pymongo.MongoClient('localhost',27017)
        self.db =self.connect[dataBase]
        self.collection = self.db[self.collection]

    def extractDB(self):

        data = pd.DataFrame(list(self.collection.find()))

        return data

    def dataTrasnform(self,data:pd.DataFrame):
        """
        传入raw数据并将火星坐标转为wgs84坐标
        :param data: raw数据
        :return: 纠偏后的数据
        """
        lon = data['lon'].values.tolist()
        lat = data['lat'].values.tolist()

        wgs_lonlist = []
        wgs_latlist = []

        for i in range(len(lon)):
            [wgs_lon, wgs_lat] = GeoTransformer.gcj02towgs84(lon[i], lat[i])

            wgs_lonlist.append(wgs_lon)
            wgs_latlist.append(wgs_lat)

        wgs_df = pd.DataFrame({'wgs_lon': wgs_lonlist,'wgs_lat': wgs_latlist})

        del data['lon']
        del data['lat']

        result = pd.concat([data, wgs_df], axis=1)

        return result

    def dataClean(self,data:pd.DataFrame):
        """
        利用pandas对数据进行清洗，方便后续上传至geomesa
        :param data: 纠偏后的数据
        :return: 清洗后的数据
        """
        data['Text'] = data['Text'].apply(lambda x: str.replace(str(x),'\n',''))
        data['Text'] = data['Text'].apply(lambda x: str.replace(str(x),',','，'))

        data['UserDescription'] = data['UserDescription'].apply(lambda x: str.replace(str(x),'\n',''))
        data['UserDescription'] = data['UserDescription'].apply(lambda x: str.replace(str(x),',','，'))

        data['Source'] = data['Source'].apply(lambda x: x[x.find('>',0) +1:x.find('<',1)] if type(x)==str else 'None')
        data['CreatedAt'] = data['CreatedAt'].apply(lambda x:datetime.strptime(x,'%a %b %d %H:%M:%S +0800 %Y'))

        result = data.fillna('None',inplace=False)

        return result

    def closeDB(self):
        """
        关闭与MongoDB数据库的连接
        """
        self.connect.close()